package com.galeno.load

import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/8/2511:58
 */
object 算子 {
  def main(args: Array[String]): Unit = {
    //由集合转换
   // Logger.getLogger("org").setLevel(Level.ERROR)
    val conf = new SparkConf()
    conf.setMaster("local[*]").setAppName(this.getClass.getName)
    val sc = new SparkContext(conf)
    val rdd1 = sc.makeRDD(List(1, 2, 3, 4, 5, 6, 7, 8))
    val rdd2: RDD[Int] = sc.parallelize(List(1, 2, 3, 4, 5, 6, 7, 8, 9))
    //rdd1.collect().foreach(println)
    //println("*" * 50)
   // rdd2.collect().foreach(println)
    /**
     * 读取外部文件
     */
    val rdd3: RDD[String] = sc.textFile("hdfs://galeno01:8020/battel.txt")
    rdd3.flatMap(line=>{
      line.split(",")
    })

    /**
     * 统计每个年龄段平均工资数据:
        1,zss,23,10000
        2,lss,24,30000
        3,wbb,34,10000
        4,DL,39,8000
        5,XG,35,8000
     *
     */
    val rddsal: RDD[String] = sc.textFile("data/sal")
    val res1: RDD[(String, String, String, Int)] = rddsal.map(line => {
      val part = line.split(",")
      (part(0), part(2), part(2), part(3).toInt)
    })
    val res2: RDD[(String, Iterable[(String, String, String, Int)])] = res1.groupBy(x => {
      val age = x._3.toInt
      if (age >= 20 && age < 30) {
       "20-30"
      } else if(age>=30){
        "30-40"
      }else{
        "0"
      }
    })
    res2.mapValues(x=>{
      val list = x.toList
      val peoples: Int = x.size
      var money=0;
      list.map(x=>{
         money += x._4
      })
      money/peoples
    })



    /**
     * sample
     */







  }


}
